In [1]:
# !pip install folium==0.5.0
In [2]:
import pandas as pd
import numpy as np
import folium
import json

folium.__version__
Out[2]:
'0.5.0'
In [3]:
df = pd.read_csv('drive/My Drive/datasets/serie_historica_acumulados.csv',
                encoding='latin-1', skipfooter=5, engine='python')
df.head()
Out[3]:
CCAA FECHA CASOS PCR+ TestAc+ Hospitalizados UCI Fallecidos Recuperados
0 AN 20/2/2020 NaN NaN NaN NaN NaN NaN NaN
1 AR 20/2/2020 NaN NaN NaN NaN NaN NaN NaN
2 AS 20/2/2020 NaN NaN NaN NaN NaN NaN NaN
3 IB 20/2/2020 1.0 NaN NaN NaN NaN NaN NaN
4 CN 20/2/2020 1.0 NaN NaN NaN NaN NaN NaN
In [4]:
df.tail()
Out[4]:
CCAA FECHA CASOS PCR+ TestAc+ Hospitalizados UCI Fallecidos Recuperados
1230 ML 24/4/2020 NaN 108.0 10.0 44.0 3.0 2.0 77.0
1231 MC 24/4/2020 NaN 1468.0 288.0 625.0 105.0 126.0 842.0
1232 NC 24/4/2020 NaN 4627.0 647.0 1937.0 129.0 429.0 1737.0
1233 PV 24/4/2020 NaN 12366.0 1766.0 6375.0 525.0 1212.0 8941.0
1234 RI 24/4/2020 NaN 3821.0 1125.0 1360.0 83.0 311.0 1999.0
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1235 entries, 0 to 1234
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   CCAA            1235 non-null   object 
 1   FECHA           1235 non-null   object 
 2   CASOS           941 non-null    float64
 3   PCR+            165 non-null    float64
 4   TestAc+         159 non-null    float64
 5   Hospitalizados  895 non-null    float64
 6   UCI             929 non-null    float64
 7   Fallecidos      912 non-null    float64
 8   Recuperados     893 non-null    float64
dtypes: float64(7), object(2)
memory usage: 87.0+ KB
In [6]:
# loading geojson file
geojson_file = 'drive/My Drive/datasets/shapefiles_ccaa_espana.geojson'
with open(geojson_file, 'r') as file:
    geo_data = json.loads(file.read())

# key of regions in the file
geo_regs = [i['properties']['hasc_1'] for i in geo_data['features']]
In [7]:
# getting the max of each column per region, since the data is accumulated 
total_df = df.copy().groupby(['CCAA']).max().reset_index()
# Modefiying region name in dataframe to match geojson file
total_df['CCAA'] = 'ES.' + total_df['CCAA']
total_df.head()
Out[7]:
CCAA FECHA CASOS PCR+ TestAc+ Hospitalizados UCI Fallecidos Recuperados
0 ES.AN 9/4/2020 10426.0 11703.0 1121.0 5715.0 717.0 1131.0 4295.0
1 ES.AR 9/4/2020 4338.0 4922.0 460.0 2372.0 291.0 709.0 1929.0
2 ES.AS 9/4/2020 2096.0 2238.0 308.0 1760.0 132.0 239.0 716.0
3 ES.CB 9/4/2020 1990.0 2071.0 244.0 982.0 78.0 182.0 1046.0
4 ES.CE 9/4/2020 98.0 100.0 25.0 10.0 4.0 4.0 98.0
In [8]:
# checking for difference in regions in both dataframe and geojson data
# to use it as key
different_regs = set(geo_regs).difference(set(total_df['CCAA']))
different_regs
Out[8]:
{'ES.LO', 'ES.MU', 'ES.NA', 'ES.PM'}
In [9]:
# getting index of different regions in geojson data
# to delete them
for i in range(len(geo_data['features'])):
    idx = []
    if geo_data['features'][i]['properties']['hasc_1'] in list(different_regs):
        idx.append(i)
In [10]:
# delete different regions from geo_data
for i in idx:
    del geo_data['features'][i]['properties']['hasc_1']

Choropleth Map for Total Cases

In [11]:
# threshold scale for total cases
threshold_scale = np.linspace(total_df['CASOS'].min(),
                           total_df['CASOS'].max()+1, 6, dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale
Out[11]:
[98, 10477, 20856, 31235, 41614, 51994]
In [12]:
# Choropleth map for total cases
spain_map = folium.Map([40.4637, -3.7492], zoom_start=6)

spain_map.choropleth(
    geo_data=geo_data,
    data=total_df,
    columns=['CCAA', 'CASOS'],
    key_on='properties.hasc_1',
    threshold_scale=threshold_scale,
    fill_color='OrRd',
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name='Total Covid19 Cases in Spain'
)

spain_map
Out[12]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Choropleth Map for Total PCR+

In [13]:
# threshold scale for total PCR+
threshold_scale = np.linspace(total_df['PCR+'].min(),
                           total_df['PCR+'].max()+1, 6, dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale
Out[13]:
[100, 11844, 23588, 35332, 47076, 58820]
In [14]:
# Choropleth map for total PCR+
spain_map = folium.Map([40.4637, -3.7492], zoom_start=6)

spain_map.choropleth(
    geo_data=geo_data,
    data=total_df,
    columns=['CCAA', 'PCR+'],
    key_on='properties.hasc_1',
    threshold_scale=threshold_scale,
    fill_color='OrRd',
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name='Total PCR+ Covid19 Cases in Spain'
)

spain_map
Out[14]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Choropleth Map for Total Hospitalized

In [15]:
# threshold scale for total Hospitalized
threshold_scale = np.linspace(total_df['Hospitalizados'].min(),
                           total_df['Hospitalizados'].max()+1, 6, dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale
Out[15]:
[10, 4834, 9658, 14482, 19306, 24131]
In [16]:
# Choropleth map for total Hospitalized
spain_map = folium.Map([40.4637, -3.7492], zoom_start=6)

spain_map.choropleth(
    geo_data=geo_data,
    data=total_df,
    columns=['CCAA', 'Hospitalizados'],
    key_on='properties.hasc_1',
    threshold_scale=threshold_scale,
    fill_color='OrRd',
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name='Total Hospitalized Covid19 Cases in Spain'
)

spain_map
Out[16]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Choropleth Map for Total deaths

In [17]:
# threshold scale for total deaths
threshold_scale = np.linspace(total_df['Fallecidos'].min(),
                           total_df['Fallecidos'].max()+1, 6, dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale
Out[17]:
[2, 1571, 3140, 4710, 6279, 7849]
In [18]:
# Choropleth map for total deaths
spain_map = folium.Map([40.4637, -3.7492], zoom_start=6)

spain_map.choropleth(
    geo_data=geo_data,
    data=total_df,
    columns=['CCAA', 'Fallecidos'],
    key_on='properties.hasc_1',
    threshold_scale=threshold_scale,
    fill_color='OrRd',
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name='Total Covid19 Deaths in Spain'
)

spain_map
Out[18]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Choropleth Map for Total Recovered

In [19]:
# threshold scale for total recoveries
threshold_scale = np.linspace(total_df['Recuperados'].min(),
                           total_df['Recuperados'].max()+1, 6, dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale
Out[19]:
[77, 7042, 14007, 20972, 27937, 34903]
In [20]:
# Choropleth map for total recoveries
spain_map = folium.Map([40.4637, -3.7492], zoom_start=6)

spain_map.choropleth(
    geo_data=geo_data,
    data=total_df,
    columns=['CCAA', 'Recuperados'],
    key_on='properties.hasc_1',
    threshold_scale=threshold_scale,
    fill_color='OrRd',
    fill_opacity=0.8,
    line_opacity=0.3,
    legend_name='Total Covid19 Recoveries in Spain'
)

spain_map
Out[20]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [20]: